import pandas as pd
import plotly.graph_objects as go
import json
network = json.load(open("network.json"))
node_df = pd.DataFrame(network["nodes"])
nodes = sorted(node_df.name)
edge_df = pd.DataFrame(network["links"])
edge_df["source_index"] = edge_df.source.apply(lambda s: nodes.index(s))
edge_df["target_index"] = edge_df.target.apply(lambda s: nodes.index(s))
# Edges of interest
eoi = edge_df[edge_df.val >= 50]
eoi
| source | target | val | source_index | target_index | |
|---|---|---|---|---|---|
| 3 | The University of Auckland | The University of Auckland | 61 | 8585 | 8585 |
| 18 | PwC New Zealand | PwC New Zealand | 998 | 6805 | 6805 |
| 42 | Deloitte New Zealand | Deloitte New Zealand | 373 | 2316 | 2316 |
| 59 | PwC | PwC | 108 | 6754 | 6754 |
| 60 | PwC New Zealand | PwC | 62 | 6805 | 6754 |
| 76 | ASB Bank | ASB Bank | 66 | 189 | 189 |
| 130 | KPMG New Zealand | KPMG New Zealand | 814 | 4520 | 4520 |
| 221 | PwC | PwC New Zealand | 105 | 6754 | 6805 |
| 268 | Deloitte New Zealand | Deloitte | 98 | 2316 | 2268 |
| 356 | ANZ | ANZ | 72 | 153 | 153 |
| 418 | Deloitte | Deloitte | 231 | 2268 | 2268 |
| 437 | EY | EY | 848 | 2564 | 2564 |
| 627 | Bank of New Zealand | Bank of New Zealand | 62 | 1022 | 1022 |
| 642 | Fonterra | Fonterra | 72 | 3073 | 3073 |
fig = go.Figure(data=[go.Sankey(
node = dict(
label = nodes
),
link = dict(
source = eoi.source_index,
target = eoi.target_index,
value = eoi.val
))])
fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()
df = pd.read_excel("linkedin.xlsx", sheet_name="Edges (Industries)")
df
| source | target | val | |
|---|---|---|---|
| 0 | Management Consulting | Management Consulting | 3064 |
| 1 | Accounting | Accounting | 1568 |
| 2 | Accounting | Management Consulting | 679 |
| 3 | Information Technology and Services | Information Technology and Services | 533 |
| 4 | Banking | Banking | 469 |
| ... | ... | ... | ... |
| 2749 | Food & Beverages | Machinery | 1 |
| 2750 | Building Materials | Real Estate | 1 |
| 2751 | Renewables & Environment | Utilities | 1 |
| 2752 | Hospital & Health Care | Supermarkets | 1 |
| 2753 | Real Estate | Banking | 1 |
2754 rows × 3 columns
industries = sorted(pd.Series(df.source.tolist() + df.target.tolist()).unique())
industries
['Accounting', 'Airlines/Aviation', 'Alternative Dispute Resolution', 'Alternative Medicine', 'Apparel & Fashion', 'Architecture & Planning', 'Automotive', 'Aviation & Aerospace', 'Banking', 'Biotechnology', 'Broadcast Media', 'Building Materials', 'Business Supplies and Equipment', 'Capital Markets', 'Chemicals', 'Civic & Social Organization', 'Civil Engineering', 'Commercial Real Estate', 'Computer & Network Security', 'Computer Games', 'Computer Hardware', 'Computer Networking', 'Computer Software', 'Construction', 'Consumer Electronics', 'Consumer Goods', 'Consumer Services', 'Cosmetics', 'Dairy', 'Defense & Space', 'Design', 'E-Learning', 'Education Management', 'Electrical/Electronic Manufacturing', 'Entertainment', 'Environmental Services', 'Events Services', 'Executive Office', 'Facilities Services', 'Farming', 'Financial Services', 'Fine Art', 'Fishery', 'Food & Beverages', 'Food Production', 'Fund-Raising', 'Furniture', 'Gambling & Casinos', 'Glass, Ceramics & Concrete', 'Government Administration', 'Government Relations', 'Graphic Design', 'Health, Wellness and Fitness', 'Higher Education', 'Hospital & Health Care', 'Hospitality', 'Human Resources', 'Import and Export', 'Individual & Family Services', 'Industrial Automation', 'Information Services', 'Information Technology and Services', 'Insurance', 'International Affairs', 'International Trade and Development', 'Internet', 'Investment Banking', 'Investment Management', 'Law Enforcement', 'Law Practice', 'Legal Services', 'Legislative Office', 'Leisure, Travel & Tourism', 'Logistics and Supply Chain', 'Luxury Goods & Jewelry', 'Machinery', 'Management Consulting', 'Maritime', 'Market Research', 'Marketing and Advertising', 'Mechanical or Industrial Engineering', 'Media Production', 'Medical Devices', 'Medical Practice', 'Mental Health Care', 'Military', 'Mining & Metals', 'Mobile Games', 'Motion Pictures and Film', 'Museums and Institutions', 'Music', 'Newspapers', 'Nonprofit Organization Management', 'Oil & Energy', 'Online Media', 'Outsourcing/Offshoring', 'Package/Freight Delivery', 'Packaging and Containers', 'Paper & Forest Products', 'Performing Arts', 'Pharmaceuticals', 'Philanthropy', 'Photography', 'Plastics', 'Political Organization', 'Primary/Secondary Education', 'Printing', 'Professional Training & Coaching', 'Program Development', 'Public Policy', 'Public Relations and Communications', 'Public Safety', 'Publishing', 'Real Estate', 'Recreational Facilities and Services', 'Religious Institutions', 'Renewables & Environment', 'Research', 'Restaurants', 'Retail', 'Security and Investigations', 'Semiconductors', 'Sporting Goods', 'Sports', 'Staffing and Recruiting', 'Supermarkets', 'Telecommunications', 'Textiles', 'Think Tanks', 'Tobacco', 'Translation and Localization', 'Transportation/Trucking/Railroad', 'Utilities', 'Venture Capital & Private Equity', 'Veterinary', 'Wholesale', 'Wine and Spirits', 'Wireless', 'Writing and Editing']
df["source_index"] = df.source.apply(lambda i: industries.index(i))
df["target_index"] = df.target.apply(lambda i: industries.index(i))
df.head(50)
| source | target | val | source_index | target_index | |
|---|---|---|---|---|---|
| 0 | Management Consulting | Management Consulting | 3064 | 76 | 76 |
| 1 | Accounting | Accounting | 1568 | 0 | 0 |
| 2 | Accounting | Management Consulting | 679 | 0 | 76 |
| 3 | Information Technology and Services | Information Technology and Services | 533 | 61 | 61 |
| 4 | Banking | Banking | 469 | 8 | 8 |
| 5 | Management Consulting | Accounting | 409 | 76 | 0 |
| 6 | Higher Education | Management Consulting | 315 | 53 | 76 |
| 7 | Information Technology and Services | Management Consulting | 304 | 61 | 76 |
| 8 | Government Administration | Government Administration | 255 | 49 | 49 |
| 9 | Financial Services | Financial Services | 244 | 40 | 40 |
| 10 | Financial Services | Management Consulting | 243 | 40 | 76 |
| 11 | Higher Education | Higher Education | 219 | 53 | 53 |
| 12 | Management Consulting | Banking | 207 | 76 | 8 |
| 13 | Management Consulting | Information Technology and Services | 198 | 76 | 61 |
| 14 | Government Administration | Management Consulting | 177 | 49 | 76 |
| 15 | Management Consulting | Financial Services | 167 | 76 | 40 |
| 16 | Telecommunications | Telecommunications | 165 | 126 | 126 |
| 17 | Banking | Management Consulting | 163 | 8 | 76 |
| 18 | Higher Education | Accounting | 152 | 53 | 0 |
| 19 | Management Consulting | Higher Education | 131 | 76 | 53 |
| 20 | Computer Software | Computer Software | 130 | 22 | 22 |
| 21 | Law Practice | Law Practice | 120 | 69 | 69 |
| 22 | Management Consulting | Government Administration | 119 | 76 | 49 |
| 23 | Insurance | Insurance | 115 | 62 | 62 |
| 24 | Retail | Management Consulting | 111 | 119 | 76 |
| 25 | Financial Services | Accounting | 109 | 40 | 0 |
| 26 | Information Technology and Services | Accounting | 108 | 61 | 0 |
| 27 | Accounting | Higher Education | 100 | 0 | 53 |
| 28 | Retail | Retail | 96 | 119 | 119 |
| 29 | Utilities | Utilities | 91 | 132 | 132 |
| 30 | Accounting | Financial Services | 90 | 0 | 40 |
| 31 | Education Management | Management Consulting | 89 | 32 | 76 |
| 32 | Banking | Accounting | 89 | 8 | 0 |
| 33 | Accounting | Banking | 88 | 0 | 8 |
| 34 | Dairy | Dairy | 83 | 28 | 28 |
| 35 | Management Consulting | Computer Software | 75 | 76 | 22 |
| 36 | Accounting | Information Technology and Services | 75 | 0 | 61 |
| 37 | Law Practice | Management Consulting | 74 | 69 | 76 |
| 38 | Government Administration | Accounting | 69 | 49 | 0 |
| 39 | Airlines/Aviation | Airlines/Aviation | 63 | 1 | 1 |
| 40 | Hospital & Health Care | Hospital & Health Care | 63 | 54 | 54 |
| 41 | Nonprofit Organization Management | Management Consulting | 62 | 92 | 76 |
| 42 | Food & Beverages | Food & Beverages | 62 | 43 | 43 |
| 43 | Retail | Accounting | 60 | 119 | 0 |
| 44 | Insurance | Management Consulting | 60 | 62 | 76 |
| 45 | Financial Services | Banking | 60 | 40 | 8 |
| 46 | Management Consulting | Law Practice | 60 | 76 | 69 |
| 47 | Management Consulting | Insurance | 59 | 76 | 62 |
| 48 | Telecommunications | Management Consulting | 55 | 126 | 76 |
| 49 | Banking | Financial Services | 55 | 8 | 40 |
eoi = df[df.val > 100]
fig = go.Figure(data=[go.Sankey(
node = dict(
label = industries
),
link = dict(
source = eoi.source_index,
target = eoi.target_index,
value = eoi.val
))])
fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()